1use super::parser::*;
3use crate::ext::io::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::encoding::{decode_to_string, encode_string};
7use anyhow::Result;
8use fancy_regex::Regex;
9use lazy_static::lazy_static;
10use std::collections::{BTreeMap, HashMap};
11
12#[derive(Debug)]
13pub struct BGIScriptBuilder {}
15
16impl BGIScriptBuilder {
17 pub fn new() -> Self {
19 BGIScriptBuilder {}
20 }
21}
22
23impl ScriptBuilder for BGIScriptBuilder {
24 fn default_encoding(&self) -> Encoding {
25 #[cfg(not(windows))]
26 return Encoding::Cp932;
27 #[cfg(windows)]
28 return Encoding::CodePage(932);
30 }
31
32 fn build_script(
33 &self,
34 buf: Vec<u8>,
35 _filename: &str,
36 encoding: Encoding,
37 _archive_encoding: Encoding,
38 config: &ExtraConfig,
39 _archive: Option<&Box<dyn Script>>,
40 ) -> Result<Box<dyn Script>> {
41 Ok(Box::new(BGIScript::new(buf, encoding, config)?))
42 }
43
44 fn extensions(&self) -> &'static [&'static str] {
45 &[]
46 }
47
48 fn script_type(&self) -> &'static ScriptType {
49 &ScriptType::BGI
50 }
51
52 fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
53 if buf_len > 28 && buf.starts_with(b"BurikoCompiledScriptVer1.00\0") {
54 return Some(255);
55 }
56 None
57 }
58}
59
60pub struct BGIScript {
62 data: MemReader,
63 encoding: Encoding,
64 strings: Vec<BGIString>,
65 is_v1: bool,
66 is_v1_instr: bool,
67 offset: usize,
68 import_duplicate: bool,
69 append: bool,
70 custom_yaml: bool,
71 add_space: bool,
72}
73
74impl std::fmt::Debug for BGIScript {
75 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76 f.debug_struct("BGIScript")
77 .field("encoding", &self.encoding)
78 .finish_non_exhaustive()
79 }
80}
81
82impl BGIScript {
83 pub fn new(data: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
89 let data = MemReader::new(data);
90 if data.data.starts_with(b"BurikoCompiledScriptVer1.00\0") {
91 let mut parser = V1Parser::new(data.to_ref(), encoding)?;
92 parser.disassemble()?;
93 let strings = parser.strings.clone();
94 let offset = parser.offset;
95 Ok(Self {
96 data,
97 encoding,
98 strings,
99 is_v1: true,
100 is_v1_instr: true,
101 offset,
102 import_duplicate: config.bgi_import_duplicate,
103 append: !config.bgi_disable_append,
104 custom_yaml: config.custom_yaml,
105 add_space: config.bgi_add_space,
106 })
107 } else {
108 let mut is_v1_instr = false;
109 let strings = {
110 let mut parser = V0Parser::new(data.to_ref());
111 match parser.disassemble() {
112 Ok(_) => parser.strings,
113 Err(_) => {
114 let mut parser = V1Parser::new(data.to_ref(), encoding)?;
115 parser.disassemble()?;
116 is_v1_instr = true;
117 parser.strings
118 }
119 }
120 };
121 Ok(Self {
122 data,
123 encoding,
124 strings,
125 is_v1: false,
126 is_v1_instr,
127 offset: 0,
128 import_duplicate: config.bgi_import_duplicate,
129 append: !config.bgi_disable_append,
130 custom_yaml: config.custom_yaml,
131 add_space: config.bgi_add_space,
132 })
133 }
134 }
135
136 fn read_string(&self, offset: usize) -> Result<String> {
137 let start = self.offset + offset;
138 let string_data = self.data.cpeek_cstring_at(start as u64)?;
139 let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?;
141 Ok(string)
142 }
143
144 fn output_with_ruby(str: &mut String, ruby: &mut Vec<String>) -> Result<()> {
145 if ruby.is_empty() {
146 return Ok(());
147 }
148 if ruby.len() % 2 != 0 {
149 return Err(anyhow::anyhow!("Ruby strings count is not even."));
150 }
151 for i in (0..ruby.len()).step_by(2) {
152 let ruby_str = &ruby[i];
153 let ruby_text = &ruby[i + 1];
154 if ruby_str.is_empty() || ruby_text.is_empty() {
155 continue;
156 }
157 *str = str.replace(ruby_str, &format!("<r{ruby_text}>{ruby_str}</r>"));
158 }
159 ruby.clear();
160 Ok(())
161 }
162}
163
164impl Script for BGIScript {
165 fn default_output_script_type(&self) -> OutputScriptType {
166 OutputScriptType::Json
167 }
168
169 fn is_output_supported(&self, _: OutputScriptType) -> bool {
170 true
171 }
172
173 fn custom_output_extension<'a>(&'a self) -> &'a str {
174 if self.custom_yaml { "yaml" } else { "json" }
175 }
176
177 fn default_format_type(&self) -> FormatOptions {
178 if self.is_v1_instr {
179 FormatOptions::None
180 } else {
181 FormatOptions::Fixed {
182 length: 32,
183 keep_original: false,
184 break_words: false,
185 insert_fullwidth_space_at_line_start: true,
186 break_with_sentence: true,
187 #[cfg(feature = "jieba")]
188 break_chinese_words: true,
189 #[cfg(feature = "jieba")]
190 jieba_dict: None,
191 }
192 }
193 }
194
195 fn extract_messages(&self) -> Result<Vec<Message>> {
196 let mut messages = Vec::new();
197 let mut name = None;
198 let mut ruby = Vec::new();
199 for bgi_string in &self.strings {
200 match bgi_string.typ {
201 BGIStringType::Name => {
202 name = Some(self.read_string(bgi_string.address)?);
203 }
204 BGIStringType::Message => {
205 let mut message = self.read_string(bgi_string.address)?;
206 if !ruby.is_empty() {
207 Self::output_with_ruby(&mut message, &mut ruby)?;
208 }
209 messages.push(Message {
210 name: name.take(),
211 message: message,
212 });
213 }
214 BGIStringType::Ruby => {
215 let ruby_str = self.read_string(bgi_string.address)?;
216 ruby.push(ruby_str);
217 }
218 _ => {}
219 }
220 }
221 Ok(messages)
222 }
223
224 fn import_messages<'a>(
225 &'a self,
226 mut messages: Vec<Message>,
227 mut file: Box<dyn WriteSeek + 'a>,
228 _filename: &str,
229 encoding: Encoding,
230 replacement: Option<&'a ReplacementTable>,
231 ) -> Result<()> {
232 if self.add_space {
233 for mes in messages.iter_mut() {
234 if !mes.message.ends_with(' ') {
235 mes.message.push(' ');
236 }
237 }
238 }
239 if !self.import_duplicate {
240 let mut used = HashMap::new();
241 let mut extra = HashMap::new();
242 let mut mes = messages.iter_mut();
243 let mut cur_mes = mes.next();
244 let mut old_offset = 0;
245 let mut new_offset = 0;
246 let mut rubys = Vec::new();
247 let mut parsed_ruby = false;
248 if self.append {
249 file.write_all(&self.data.data)?;
250 new_offset = self.data.data.len();
251 }
252 for curs in &self.strings {
253 if !curs.is_internal() {
254 if cur_mes.is_none() {
255 cur_mes = mes.next();
256 }
257 }
258 if used.contains_key(&curs.address) && curs.is_internal() {
259 let (_, new_address) = used.get(&curs.address).unwrap();
260 file.write_u32_at(curs.offset as u64, *new_address as u32)?;
261 continue;
262 }
263 let nmes = match curs.typ {
264 BGIStringType::Internal => self.read_string(curs.address)?,
265 BGIStringType::Ruby => {
266 if !self.is_v1 && self.is_v1_instr {
267 if rubys.is_empty() {
268 if parsed_ruby {
269 String::from("<")
270 } else {
271 rubys = match &mut cur_mes {
272 Some(m) => parse_ruby_from_text(&mut m.message)?,
273 None => return Err(anyhow::anyhow!("No enough messages.")),
274 };
275 parsed_ruby = true;
276 if rubys.is_empty() {
277 String::from("<")
278 } else {
279 let ruby_str = rubys.remove(0);
280 ruby_str
281 }
282 }
283 } else {
284 rubys.remove(0)
285 }
286 } else {
287 self.read_string(curs.address)?
288 }
289 }
290 BGIStringType::Name => match &cur_mes {
291 Some(m) => {
292 if let Some(name) = &m.name {
293 let mut name = name.clone();
294 if let Some(replacement) = replacement {
295 for (key, value) in replacement.map.iter() {
296 name = name.replace(key, value);
297 }
298 }
299 name
300 } else {
301 return Err(anyhow::anyhow!("Name is missing for message."));
302 }
303 }
304 None => return Err(anyhow::anyhow!("No enough messages.")),
305 },
306 BGIStringType::Message => {
307 if !rubys.is_empty() {
308 eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
309 crate::COUNTER.inc_warning();
310 rubys.clear();
311 }
312 parsed_ruby = false;
313 let mes = match &cur_mes {
314 Some(m) => {
315 let mut message = m.message.clone();
316 if let Some(replacement) = replacement {
317 for (key, value) in replacement.map.iter() {
318 message = message.replace(key, value);
319 }
320 }
321 message
322 }
323 None => return Err(anyhow::anyhow!("No enough messages.")),
324 };
325 cur_mes.take();
326 mes
327 }
328 };
329 let in_used = match used.get(&curs.address) {
330 Some((s, address)) => {
331 if s == &nmes {
332 file.write_u32_at(curs.offset as u64, *address as u32)?;
333 continue;
334 }
335 if let Some(address) = extra.get(&nmes) {
336 file.write_u32_at(curs.offset as u64, *address as u32)?;
337 continue;
338 }
339 true
340 }
341 None => false,
342 };
343 let bgi_str_old_offset = curs.address + self.offset;
344 if !self.append && old_offset < bgi_str_old_offset {
345 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
346 new_offset += bgi_str_old_offset - old_offset;
347 old_offset = bgi_str_old_offset;
348 }
349 let old_str_len = self
350 .data
351 .cpeek_cstring_at(bgi_str_old_offset as u64)?
352 .as_bytes_with_nul()
353 .len();
354 let nmess = encode_string(encoding, &nmes, false)?;
355 let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
356 if write_to_original {
357 file.write_all_at(bgi_str_old_offset as u64, &nmess)?;
358 file.write_u8_at(bgi_str_old_offset as u64 + nmess.len() as u64, 0)?; } else {
360 file.write_all(&nmess)?;
361 file.write_u8(0)?; }
363 let new_address = if write_to_original {
364 bgi_str_old_offset - self.offset
365 } else {
366 new_offset - self.offset
367 };
368 file.write_u32_at(curs.offset as u64, new_address as u32)?;
369 if in_used {
370 extra.insert(nmes, new_address);
371 } else {
372 used.insert(curs.address, (nmes, new_address));
373 }
374 old_offset += old_str_len;
375 if !write_to_original {
376 new_offset += nmess.len() + 1; }
378 }
379 if cur_mes.is_some() || mes.next().is_some() {
380 return Err(anyhow::anyhow!("Some messages were not processed."));
381 }
382 if !self.append && old_offset < self.data.data.len() {
383 file.write_all(&self.data.data[old_offset..])?;
384 }
385 return Ok(());
386 }
387 let mut mes = messages.iter_mut();
388 let mut cur_mes = None;
389 let mut strs = self.strings.iter();
390 let mut nstrs = Vec::new();
391 let mut cur_str = strs.next();
392 let mut old_offset = 0;
393 let mut new_offset = 0;
394 let mut rubys = Vec::new();
395 let mut parsed_ruby = false;
396 if self.append {
397 file.write_all(&self.data.data)?;
398 new_offset = self.data.data.len();
399 }
400 while let Some(curs) = cur_str {
401 if !curs.is_internal() {
402 if cur_mes.is_none() {
403 cur_mes = mes.next();
404 }
405 }
406 let bgi_str_old_offset = curs.address + self.offset;
407 if !self.append && old_offset < bgi_str_old_offset {
408 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
409 new_offset += bgi_str_old_offset - old_offset;
410 old_offset = bgi_str_old_offset;
411 }
412 let old_str_len = self
413 .data
414 .cpeek_cstring_at((curs.address + self.offset) as u64)?
415 .as_bytes_with_nul()
416 .len();
417 let nmes = match curs.typ {
418 BGIStringType::Internal => self.read_string(curs.address)?,
419 BGIStringType::Ruby => {
420 if !self.is_v1 && self.is_v1_instr {
421 if rubys.is_empty() {
422 if parsed_ruby {
423 String::from("<")
424 } else {
425 rubys = match &mut cur_mes {
426 Some(m) => parse_ruby_from_text(&mut m.message)?,
427 None => return Err(anyhow::anyhow!("No enough messages.")),
428 };
429 parsed_ruby = true;
430 if rubys.is_empty() {
431 String::from("<")
432 } else {
433 let ruby_str = rubys.remove(0);
434 ruby_str
435 }
436 }
437 } else {
438 rubys.remove(0)
439 }
440 } else {
441 self.read_string(curs.address)?
442 }
443 }
444 BGIStringType::Name => match &cur_mes {
445 Some(m) => {
446 if let Some(name) = &m.name {
447 let mut name = name.clone();
448 if let Some(replacement) = replacement {
449 for (key, value) in replacement.map.iter() {
450 name = name.replace(key, value);
451 }
452 }
453 name
454 } else {
455 return Err(anyhow::anyhow!("Name is missing for message."));
456 }
457 }
458 None => return Err(anyhow::anyhow!("No enough messages.")),
459 },
460 BGIStringType::Message => {
461 if !rubys.is_empty() {
462 eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
463 crate::COUNTER.inc_warning();
464 rubys.clear();
465 }
466 parsed_ruby = false;
467 let mes = match &cur_mes {
468 Some(m) => {
469 let mut message = m.message.clone();
470 if let Some(replacement) = replacement {
471 for (key, value) in replacement.map.iter() {
472 message = message.replace(key, value);
473 }
474 }
475 message
476 }
477 None => return Err(anyhow::anyhow!("No enough messages.")),
478 };
479 cur_mes.take();
480 mes
481 }
482 };
483 let nmes = encode_string(encoding, &nmes, false)?;
484 file.write_all(&nmes)?;
485 file.write_u8(0)?;
486 let new_str_len = nmes.len() + 1; let new_address = new_offset - self.offset;
488 nstrs.push(BGIString {
489 offset: curs.offset,
490 address: new_address,
491 typ: curs.typ.clone(),
492 });
493 old_offset += old_str_len;
494 new_offset += new_str_len;
495 cur_str = strs.next();
496 }
497 if cur_mes.is_some() || mes.next().is_some() {
498 return Err(anyhow::anyhow!("Some messages were not processed."));
499 }
500 for str in nstrs {
501 file.write_u32_at(str.offset as u64, str.address as u32)?;
502 }
503 if !self.append && old_offset < self.data.data.len() {
504 file.write_all(&self.data.data[old_offset..])?;
505 }
506 Ok(())
507 }
508
509 fn custom_export(&self, filename: &std::path::Path, encoding: Encoding) -> Result<()> {
510 let mut strs = Vec::with_capacity(self.strings.len());
511 for s in &self.strings {
512 let string = self.read_string(s.address)?;
513 strs.push(string);
514 }
515 let data = if self.custom_yaml {
516 serde_yaml_ng::to_string(&strs)
517 .map_err(|e| anyhow::anyhow!("Failed to serialize to YAML: {}", e))?
518 } else {
519 serde_json::to_string_pretty(&strs)
520 .map_err(|e| anyhow::anyhow!("Failed to serialize to JSON: {}", e))?
521 };
522 let data = encode_string(encoding, &data, false)?;
523 let mut writer = crate::utils::files::write_file(filename)?;
524 writer.write_all(&data)?;
525 writer.flush()?;
526 Ok(())
527 }
528
529 fn custom_import<'a>(
530 &'a self,
531 custom_filename: &'a str,
532 mut file: Box<dyn WriteSeek + 'a>,
533 encoding: Encoding,
534 output_encoding: Encoding,
535 ) -> Result<()> {
536 let output = crate::utils::files::read_file(custom_filename)?;
537 let s = decode_to_string(output_encoding, &output, true)?;
538 let strs: Vec<String> = if self.custom_yaml {
539 serde_yaml_ng::from_str(&s)
540 .map_err(|e| anyhow::anyhow!("Failed to parse YAML: {}", e))?
541 } else {
542 serde_json::from_str(&s).map_err(|e| anyhow::anyhow!("Failed to parse JSON: {}", e))?
543 };
544 if strs.len() != self.strings.len() {
545 return Err(anyhow::anyhow!(
546 "The number of strings in the imported file ({}) does not match the original ({})",
547 strs.len(),
548 self.strings.len()
549 ));
550 }
551 if !self.import_duplicate {
552 let mut used = HashMap::new();
553 let mut extra = HashMap::new();
554 let mut mes = strs.iter();
555 let mut cur_str = mes.next();
556 let mut old_offset = 0;
557 let mut new_offset = 0;
558 if self.append {
559 file.write_all(&self.data.data)?;
560 new_offset = self.data.data.len();
561 }
562 for curs in &self.strings {
563 let nmes = match cur_str {
564 Some(s) => s,
565 None => return Err(anyhow::anyhow!("No enough strings.")),
566 };
567 cur_str = mes.next();
568 let in_used = match used.get(&curs.address) {
569 Some((s, address)) => {
570 if s == &nmes {
571 file.write_u32_at(curs.offset as u64, *address as u32)?;
572 continue;
573 }
574 if let Some(address) = extra.get(nmes) {
575 file.write_u32_at(curs.offset as u64, *address as u32)?;
576 continue;
577 }
578 true
579 }
580 None => false,
581 };
582 let bgi_str_old_offset = curs.address + self.offset;
583 if !self.append && old_offset < bgi_str_old_offset {
584 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
585 new_offset += bgi_str_old_offset - old_offset;
586 old_offset = bgi_str_old_offset;
587 }
588 let old_str_len = self
589 .data
590 .cpeek_cstring_at(bgi_str_old_offset as u64)?
591 .as_bytes_with_nul()
592 .len();
593 let nmess = encode_string(encoding, nmes, false)?;
594 let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
595 if write_to_original {
596 file.write_all_at(bgi_str_old_offset as u64, &nmess)?;
597 file.write_u8_at(bgi_str_old_offset as u64 + nmess.len() as u64, 0)?; } else {
599 file.write_all(&nmess)?;
600 file.write_u8(0)?; }
602 let new_address = if write_to_original {
603 bgi_str_old_offset - self.offset
604 } else {
605 new_offset - self.offset
606 };
607 file.write_u32_at(curs.offset as u64, new_address as u32)?;
608 if in_used {
609 extra.insert(nmes, new_address);
610 } else {
611 used.insert(curs.address, (nmes, new_address));
612 }
613 old_offset += old_str_len;
614 if !write_to_original {
615 new_offset += nmess.len() + 1; }
617 }
618 if cur_str.is_some() || mes.next().is_some() {
619 return Err(anyhow::anyhow!("Some strings were not processed."));
620 }
621 if !self.append && old_offset < self.data.data.len() {
622 file.write_all(&self.data.data[old_offset..])?;
623 }
624 return Ok(());
625 }
626 let mut mes = strs.iter();
627 let mut cur_mes = mes.next();
628 let mut strs = self.strings.iter();
629 let mut nstrs = Vec::new();
630 let mut cur_str = strs.next();
631 let mut old_offset = 0;
632 let mut new_offset = 0;
633 if self.append {
634 file.write_all(&self.data.data)?;
635 new_offset = self.data.data.len();
636 }
637 while let Some(curs) = cur_str {
638 let bgi_str_old_offset = curs.address + self.offset;
639 if !self.append && old_offset < bgi_str_old_offset {
640 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
641 new_offset += bgi_str_old_offset - old_offset;
642 old_offset = bgi_str_old_offset;
643 }
644 let old_str_len = self
645 .data
646 .cpeek_cstring_at((curs.address + self.offset) as u64)?
647 .as_bytes_with_nul()
648 .len();
649 let nmes = match cur_mes {
650 Some(s) => s,
651 None => return Err(anyhow::anyhow!("No enough strings.")),
652 };
653 cur_mes = mes.next();
654 let nmes = encode_string(encoding, nmes, false)?;
655 file.write_all(&nmes)?;
656 file.write_u8(0)?;
657 let new_str_len = nmes.len() + 1; let new_address = new_offset - self.offset;
659 nstrs.push(BGIString {
660 offset: curs.offset,
661 address: new_address,
662 typ: curs.typ.clone(),
663 });
664 old_offset += old_str_len;
665 new_offset += new_str_len;
666 cur_str = strs.next();
667 }
668 if cur_mes.is_some() || mes.next().is_some() {
669 return Err(anyhow::anyhow!("Some strings were not processed."));
670 }
671 for str in nstrs {
672 file.write_u32_at(str.offset as u64, str.address as u32)?;
673 }
674 if !self.append && old_offset < self.data.data.len() {
675 file.write_all(&self.data.data[old_offset..])?;
676 }
677 Ok(())
678 }
679}
680
681lazy_static! {
682 static ref RUBY_REGEX: Regex = Regex::new(r"<r([^>]+)>([^<]+)</r>").unwrap();
683}
684
685fn parse_ruby_from_text(text: &mut String) -> Result<Vec<String>> {
686 let mut map = BTreeMap::new();
687 for i in RUBY_REGEX.captures_iter(&text) {
688 let i = i?;
689 let ruby_text = i.get(1).map_or("", |m| m.as_str());
690 let ruby_str = i.get(2).map_or("", |m| m.as_str());
691 if !ruby_text.is_empty() && !ruby_str.is_empty() {
692 map.insert(ruby_str.to_owned(), ruby_text.to_owned());
693 }
694 }
695 let mut result = Vec::new();
696 for (ruby_str, ruby_text) in map {
697 *text = text.replace(&format!("<r{ruby_text}>{ruby_str}</r>"), &ruby_str);
698 result.push(ruby_str);
699 result.push(ruby_text);
700 }
701 Ok(result)
702}
703
704#[test]
705fn test_parse_ruby_from_text() {
706 let mut text =
707 String::from("This is a test <rRubyText>RubyString</r> and <rAnotherText>AnotherRuby</r>.");
708 let ruby = parse_ruby_from_text(&mut text).unwrap();
709 assert_eq!(text, "This is a test RubyString and AnotherRuby.");
710 assert_eq!(
711 ruby,
712 vec![
713 "AnotherRuby".to_string(),
714 "AnotherText".to_string(),
715 "RubyString".to_string(),
716 "RubyText".to_string()
717 ]
718 );
719}